Data clean up

# Review imported Data
head(Data)
##   UnitNumber   LeaseEnd Term LeaseEndDate
## 1     08N107   1/1/2022   60   2022-01-01
## 2     08N829   3/1/2021   84   2021-03-01
## 3     08N108 12/31/2021   60   2021-12-31
## 4     08N109   1/1/2022   60   2022-01-01
## 5     08N110   1/1/2022   60   2022-01-01
## 6     08N111   1/1/2022   60   2022-01-01
# Standardize End of Lease -- Some leases have end of lease date as the 30th/31st, this puts all units on the 1st
Data = Data %>%
  mutate(
    FirstOfMonth = floor_date(LeaseEndDate, "month")
    ,LeaseEndFix = if_else(FirstOfMonth == LeaseEndDate
                           ,LeaseEndDate
                           ,LeaseEndDate + 1
    )
  )

# Holding place to adjust
Data$DateIncrease = 0

# End result method check
Data$NewEnd = Data$LeaseEndFix %m+% months(Data$DateIncrease)

# New lease to replace -- Holding place is all 7-year leases
Data$NewLease = 84
Data$SecondEnd = Data$NewEnd %m+% months(Data$NewLease)

# Base level variance
BaseLevel = var(table(Data$SecondEnd))

# Cycle review
plot_ly(alpha = .6) %>%
  add_histogram(x = Data$LeaseEndFix, name = "First Returns") %>%
  add_histogram(x = Data$SecondEnd, name = "Second Returns") %>%
  layout(barmode = "overlay")
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
nSims = 10000


Units = length(Data$UnitNumber)

Loop Method

# Start Timestamp
StartLoop = Sys.time()

# Initialize Best Run variable
BestRunModel1 = 500000

# Define random addition for number of months to hold lease for simulation
ExtensionReturns = function(x){
  ifelse(x == 0
         ,sample(c(0:6), 1, replace = TRUE, prob = c(.50,rep(1/6*.50, 6))) #Input will be 84-84 to be a zero
         ,sample(c(0,1:x), 1, replace = TRUE, prob = c(.50,rep(1/x*.50, x))) #Input will be 84-x, where x is <84 so it will be >1 response
  )
}

# Define new lease replacement
ExtensionNewLease = function(x){
  # Lease Options: 60, 66, 72, 78, 84
  # We'll try to keep the options low, so .05 for each besides 84
  sample(c(60, 66, 72, 78, 84), 1, replace = TRUE, prob = c(.05,.05,.05,.05,.8))
}

# define process for adding months and determining variance
SimRun = function(){
  x = Data
  
  x = x %>%
    rowwise() %>%
    mutate(DateIncrease = ExtensionReturns((84-Term)) # Turn-in extension
           ,NewLease = ExtensionNewLease() # Replacement leases
           )
  
  # Determine new date to turn in leases
  x$NewEnd = x$LeaseEndFix %m+% months(x$DateIncrease)
  
  # Determine end date for new leases
  x$SecondEnd = x$NewEnd %m+% months(x$NewLease)
  
  # 
  a = var(table(x$SecondEnd))
  if(a < BestRunModel1){
    return(x)
  }
   
}

for(i in 1:nSims){
  y = SimRun()
  if(length(y) > 0){
    DataModel1 = y
    BestRunModel1 = var(table(DataModel1$SecondEnd))
  }
}

# Cycle review
plot_ly(alpha = .6) %>%
  add_histogram(x = Data$SecondEnd, name = "Second Returns") %>%
  add_histogram(x = DataModel1$SecondEnd, name = "Model 1") %>%
  layout(barmode = "overlay"
         ,xaxis = list(type = "date"
                       ,tickformat = "%B %Y")
         ,legend = list(x = .6, y = 1))
# End Timestamp
EndLoop = Sys.time()

Matrix Method

Data will be structed into list, and then transformed into a matrix to determine the optimal set

# Start Timestamp
StartMatrix = Sys.time()

# Define random addition for number of months to hold lease for simulation
ExtensionReturns = function(x){
  ifelse(x == 0
         ,sample(c(0:6), 1, replace = TRUE, prob = c(.50,rep(1/6*.50, 6))) #Input will be 84-84 to be a zero
         ,sample(c(0,1:x), 1, replace = TRUE, prob = c(.50,rep(1/x*.50, x))) #Input will be 84-x, where x is <84 so it will be >1 response
  )
}

# Define new leases for unit swaps
NewLeases = function(){
  # Lease Options: 60, 66, 72, 78, 84
  # We'll try to keep the options low, so .05 for each besides 84
  sample(c(60, 66, 72, 78, 84), 1, replace = TRUE, prob = c(.05,.05,.05,.05,.8))
}

# Create simulation
Simulation = function(){
  
  # Repeat units, lease ends, and terms as vector
  UnitList = rep(Data$UnitNumber, nSims)
  InitialLeaseEnd = rep(Data$LeaseEndFix, nSims)
  Terms = rep(Data$Term, nSims)
  
  #Generate Extensions
  SimulationSet = tibble(InitialLeaseEnd, Terms)
  
  # Create extensions, new end dates, new leases, and second end dates
  SimulationSet = SimulationSet %>% 
    rowwise() %>%
    mutate(
      Extensions = ExtensionReturns(84-Terms)
    )
  
  NewEnd = SimulationSet$InitialLeaseEnd %m+% months(SimulationSet$Extensions)
  
  NewLeasesTerms = sample(c(60, 66, 72, 78, 84), nSims*Units, replace = TRUE, prob = c(.05,.05,.05,.05,.8))
          
  NewLeaseEnd = NewEnd %m+% months(NewLeasesTerms)
    
  SimulationSet = tibble(UnitList, InitialLeaseEnd, Terms, NewEnd, NewLeasesTerms, NewLeaseEnd)
    
  return(SimulationSet)
}

# Simulation run
Results = as_tibble(Simulation())

# Put each simulation into a matrix to review results
SecondEndResults = as.data.frame(matrix(Results$NewLeaseEnd, nrow = Units, ncol = nSims))
SecondEndResults = SecondEndResults %>%
                      mutate_all(as_date)

# Find variance to determine best model
TableVariance = function(x) {
  y = table(x)
  z = var(y)
  return(z)
}

VarianceList = apply(SecondEndResults, 2, TableVariance)

BestSim = match(min(VarianceList),VarianceList)

BestSimStart = (BestSim-1) * Units + 1 
BestSimEnd = (BestSim) * Units

Results = Results[BestSimStart:BestSimEnd,]

# Results[BestSimStart,] #08N107
# Results[BestSimEnd,] #08N1325
# var(table(x$NewLeaseEnd))

# Cycle review
plot_ly(alpha = .6) %>%
  add_histogram(x = Data$SecondEnd, name = "Second Returns") %>%
  add_histogram(x = Results$NewLeaseEnd, name = "Model 2") %>%
  layout(barmode = "overlay"
         ,xaxis = list(type = "date"
                       ,tickformat = "%B %Y")
         ,legend = list(x = .6, y = 1))
# End Timestamp
EndMatrix = Sys.time()

Comparison

EndLoop - StartLoop
## Time difference of 6.543715 mins
BestRunModel1
## [1] 102.191
EndMatrix - StartMatrix
## Time difference of 5.351898 mins
var(table(Results$NewLeaseEnd))
## [1] 101.3339